
***************************************************************************************************************
/*
THIS DO-FILE:
1. Gets RAND kids data, merges on other helpers
2. Generates lots of variables
3. Creates kidsandhelpers_sample.dta 
4. Also creates kidinfo.dta for figures
*/
***************************************************************************************************************

clear all
set more off
set maxvar 20000

***************************************************************************************************************


*PREPARE KID FILE
*use "`hrs'\rndfamC_stata\StataSE\rndfamk_c.dta", clear
use "$rawdata\randhrsfam1992_2014v1_STATA\randhrsfamk1992_2014v1.dta", clear
keep hhid pn hhidpn opn kidid inw* *hlpfut* *hlphrs *work *incb kaeduc kagenderbg kabyearbg *mstat *nkid *agebg *ownhm *resd *lvnear *kdcare *will *tcany *tcamt *fcany *ind /*krrel*/
destring kidid, replace
tempfile kidsfile
save `kidsfile', replace

*MERGE HELPER FILES AND KID FILE TOGETHER
use "allADLIADLhelpers_4", clear
merge 1:1 hhidpn kidid using "allADLIADLhelpers_5", nogen
merge 1:1 hhidpn kidid using "allADLIADLhelpers_6", nogen
merge 1:1 hhidpn kidid using "allADLIADLhelpers_7", nogen
merge 1:1 hhidpn kidid using "allADLIADLhelpers_8", nogen
merge 1:1 hhidpn kidid using "allADLIADLhelpers_9", nogen
merge 1:1 hhidpn kidid using "allADLIADLhelpers_10", nogen
merge 1:1 hhidpn kidid using "allADLIADLhelpers_11", nogen
merge 1:1 hhidpn kidid using "allADLIADLhelpers_12", nogen
merge 1:1 hhidpn kidid using `kidsfile', gen(kidmerge)

*kid/helper classification
*1: non-kid helpers
*2: kids who do not provide help
*3: kids who provide help
gen kidhelper=(kidmerge==3)
gen kidnonhelper=(kidmerge==2)
gen nonkidhelper=(kidmerge==1)

*DATA CLEANING
forval x=4/12 {
	*whether kid lives nearby
	gen kidnear`x'=inlist(k`x'lvnear,1,4,5) if inlist(k`x'lvnear,.c,1,4,5,6,8,10)
	gen kidnearest`x'=inlist(k`x'lvnear,1,4,5,6) if inlist(k`x'lvnear,.c,1,4,5,6,8,10) //within 10 miles or nearest
	
	*whether kid works full-time, part-time, doesn't work
	gen kidworkNT`x'=(k`x'work==0) if inlist(k`x'work,0,1,2) & inw`x'==1
	gen kidworkPT`x'=(k`x'work==1) if inlist(k`x'work,0,1,2) & inw`x'==1
	gen kidworkFT`x'=(k`x'work==2) if inlist(k`x'work,0,1,2) & inw`x'==1
	
	gen grandkidcare`x'=(k`x'kdcare==1) if inlist(k`x'kdcare,0,1) & inw`x'==1
	
	gen kinw`x'=k`x'ind
	gen kmstat`x'=k`x'mstat
	gen kkids`x'=k`x'nkid
	gen kage`x'=k`x'agebg
	gen kownhome`x'=k`x'ownhm
	gen kresd`x'=k`x'resd
	gen khelphours`x'=k`x'hlphrs // didn't end up using RAND definition (used my own for consistency with non-kid hours)
	gen kinc`x'=k`x'incb
	gen kwill`x'=k`x'will
	gen kanyxfertokid`x'=k`x'tcany
	gen kamtxfertokid`x'=k`x'tcamt
	gen kanyxferfromkid`x'=k`x'fcany
}
gen kmale=(kagenderbg==1) if inlist(kagenderbg,1,2)
gen kfemale=(kagenderbg==2) if inlist(kagenderbg,1,2)

gen keducLTHS=(kaeduc<12) if kaeduc<.
gen keducHS=(kaeduc==12) if kaeduc<.
gen 	keducbin=1 if kaeduc<12 
replace keducbin=2 if kaeduc==12
replace keducbin=3 if kaeduc>12 & kaeduc<=17

//skipped for reinterviews:
replace kkids4=k3nkid if kkids4>=. 
replace kownhome8=kownhome7 if kownhome8>=.


*CHILD SELECTION

gen maxchild0=1 if kidhelper==1 | kidnonhelper==1
bys hhidpn: egen nmaxchild0=sum(maxchild0) if kidhelper==1 | kidnonhelper==1

*1st criterion: most hours overall (use my definition instead of RAND to be more consistent with non-kid helper definition)
egen helphours_overall=rowtotal(helper_hpm4 helper_hpm5 helper_hpm6 helper_hpm7 helper_hpm8 helper_hpm9 helper_hpm10 helper_hpm11 helper_hpm12) if kidhelper==1 | kidnonhelper==1
*egen helphours_overall=rowtotal(khelphours4 khelphours5 khelphours6 khelphours7 khelphours8 khelphours9 khelphours10) if kidhelper==1 | kidnonhelper==1
bys hhidpn: egen helphours_overall_max=max(helphours_overall) if kidhelper==1 | kidnonhelper==1
gen maxchild1=1 if helphours_overall==helphours_overall_max & (kidhelper==1 | kidnonhelper==1)
bys hhidpn: egen nmaxchild1=sum(maxchild1) if kidhelper==1 | kidnonhelper==1
*2nd criterion: lives nearest for longest timespan
egen kidnearest_overall=rowtotal(kidnearest4 kidnearest5 kidnearest6 kidnearest7 kidnearest8 kidnearest9 kidnearest10 kidnearest11 kidnearest12) if maxchild1==1
bys hhidpn: egen kidnearest_overall_max=max(kidnearest_overall) if maxchild1==1
gen maxchild2=1 if kidnearest_overall==kidnearest_overall_max & maxchild1==1 & nmaxchild1>1
bys hhidpn: egen nmaxchild2=sum(maxchild2) if kidhelper==1 | kidnonhelper==1
replace nmaxchild2=nmaxchild1 if nmaxchild2==0
*3rd criterion: daughter
bys hhidpn: egen isdaughter=max(kagenderbg) if maxchild2==1
gen maxchild3=1 if kagenderbg==isdaughter & maxchild2==1 & nmaxchild2>1
bys hhidpn: egen nmaxchild3=sum(maxchild3) if kidhelper==1 | kidnonhelper==1
replace nmaxchild3=nmaxchild2 if nmaxchild3==0
*4th criterion: oldest
bys hhidpn: egen oldest=min(kabyearbg) if maxchild3==1
gen maxchild4=1 if kabyearbg==oldest & maxchild3==1 & nmaxchild3>1
bys hhidpn: egen nmaxchild4=sum(maxchild4) if kidhelper==1 | kidnonhelper==1
replace nmaxchild4=nmaxchild3 if nmaxchild4==0

gen selection_criterion=.
gen 	selectedkidid=kidid   if maxchild1==1 & nmaxchild1==1
replace selection_criterion=1 if maxchild1==1 & nmaxchild1==1
replace selectedkidid=kidid   if maxchild2==1 & nmaxchild2==1 & selectedkidid==.
replace selection_criterion=2 if maxchild2==1 & nmaxchild2==1 & selection_criterion==.
replace selectedkidid=kidid   if maxchild3==1 & nmaxchild3==1 & selectedkidid==.
replace selection_criterion=3 if maxchild3==1 & nmaxchild3==1 & selection_criterion==.
replace selectedkidid=kidid   if maxchild4==1 & nmaxchild4==1 & selectedkidid==.
replace selection_criterion=4 if maxchild4==1 & nmaxchild4==1 & selection_criterion==.

*see if one and only one selectedkid per hhidpn
bys hhidpn: egen numselectedkids=count(selectedkidid)
*still ~2% of sample doesn't have selected kid (mostly cuz of missings, eg missing birthdates)
*so just pick randomly from kids still tied
bys hhidpn nmaxchild4: replace selection_criterion=5 if selectedkidid==. & nmaxchild4>1 & nmaxchild4!=. & _n==1
bys hhidpn nmaxchild4: replace selectedkidid=kidid   if selectedkidid==. & nmaxchild4>1 & nmaxchild4!=. & _n==1

gen selectedkid=1 if selectedkidid!=.
replace selectedkid=0 if selectedkid==. & (kidhelper==1 | kidnonhelper==1)

save "kidsandhelpers_sample.dta", replace
***************************************************************************************************************

***************************************************************************************************************
*get kid helper information for figures
use "kidsandhelpers_sample.dta", clear
forval x=4/12 {
	gen kdaughter`x'=(kfemale==1) if kinw`x'==1 & inlist(kfemale,0,1)
	bys hhidpn: egen hasdaughter`x'=max(kdaughter`x')
	
	bys hhidpn: egen kidinres`x'=max(kresd`x')
	replace kidinres`x'=1 if kidinres`x'>0 & kidinres`x'!=.
	gen tempfemres`x'=(inlist(k`x'resd,1,2) & kdaughter`x'==1) if k`x'resd!=. & kdaughter`x'!=.
	bys hhidpn: egen dauginres`x'=max(tempfemres`x')
	
	bys hhidpn: egen kid10m`x'=max(kidnear`x')
	gen tempfem10m`x'=(kidnear`x'==1 & kdaughter`x'==1) if kidnear`x'!=. & kdaughter`x'!=.
	bys hhidpn: egen daug10m`x'=max(tempfem10m`x')
	
	bys hhidpn: egen kidhlpfut`x'=max(k`x'hlpfut)
	gen tempfemhlpfut`x'=(kidhlpfut`x'==1 & kdaughter`x'==1) if kidhlpfut`x'!=. & kdaughter`x'!=.
	bys hhidpn: egen daughlpfut`x'=max(tempfemhlpfut`x')
}

keep hhidpn hasdaughter* kidinres* dauginres* kid10m* daug10m* kidhlpfut* daughlpfut*
duplicates drop
save "kidinfo.dta", replace
***************************************************************************************************************
